#### Meta Data ####

# article: "The Ambivalent Effect of Autocratization on Domestic Terrorism" 
# journal: "Studies in Conflict & Terrorism"
# authors: "Lott, Lars; Croissant, Aurel; Trinn. Christoph"
# date: 2023-10-02
# written under "R version 4.3.1 (2023-06-16 ucrt)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# loading packages

library(countrycode)
library(tidyverse)
library(viridis)
library(ggpubr)
library(texreg)
library(sjPlot)
library(ggeffects)
library(readstata13)
library(here)
#library(ERT)
library(imputeTS)
library(stargazer)


# new functions for data wrangling 

min.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(min(v, na.rm=T)) }
} 

max.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(max(v, na.rm=T)) }
}

mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}

sum.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(sum(v, na.rm=T)) }
}


#### Import Data ####

# Load VDem data
vdem12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds") 
vdem12$cowcode <- vdem12$COWcode


#Global Terrorism Database
gtd_data <- readxl::read_excel("data/GTD/globalterrorismdb_0522dist.xlsx")
head(gtd_data)

#### Data Preparation of Global Terrorism Database ####

gtd_data_cy <- gtd_data %>%
  dplyr::select(eventid, iyear, imonth, iday, country, country_txt, region, region_txt, city, latitude, longitude, doubtterr, 
         specificity, attacktype1, attacktype2, attacktype3, success, suicide, targtype1, targsubtype1, gname, nperps, 
         nkill, nkillter, nkillus, INT_LOG, INT_IDEO, INT_MISC, INT_ANY)

#### Delete observations with "Doubt Terrorism Proper?" ####

gtd_data_cy <- gtd_data_cy %>%
  filter(doubtterr==0 | doubtterr== -9) # 0 = there is essentially no doubt as to whether the incident is an act of terrorism, 
                                        # -9 = variable is not included in the database before 1997

## Generate Country-Year Measures of GTD ####

## Number Terrorist Attacks Domestic ##

gtd_data_cy_dom <- gtd_data_cy %>%
  filter(INT_ANY==0) %>%
  group_by(country_txt, iyear) %>% # group by country and year 
  summarize(count_attacks_dom = n()) # count_attacks is variable of how many attacks occurred in a given year in a country

## Add COWCode to GDT Data 
gtd_data_cy_dom$cowcode <- countrycode(gtd_data_cy_dom$country_txt, "country.name", "cown", warn = TRUE)
gtd_data_cy_dom$year <- gtd_data_cy_dom$iyear

gtd_data_cy_dom$cowcode[gtd_data_cy_dom$country_txt == "West Bank and Gaza Strip"] <- 1020

gtd_data_cy_dom <- gtd_data_cy_dom %>%
  ungroup() %>%
  dplyr::select(cowcode, year, count_attacks_dom)

## Number Terrorist Attacks Transnational ##

gtd_data_cy_int <- gtd_data_cy %>%
  filter(INT_ANY==1) %>%
  group_by(country_txt, iyear) %>% # group by country and year 
  summarize(count_attacks_int = n()) # count_attacks is variable of how many attacks occurred in a given year in a country

## Add COWCode to GDT Data 
gtd_data_cy_int$cowcode <- countrycode(gtd_data_cy_int$country_txt, "country.name", "cown", warn = TRUE)
gtd_data_cy_int$year <- gtd_data_cy_int$iyear

## correct cowcodes ##

gtd_data_cy_int$cowcode[gtd_data_cy_int$country_txt == "Hong Kong"] <- 715
gtd_data_cy_int$cowcode[gtd_data_cy_int$country_txt == "Serbia"] <- 345
gtd_data_cy_int$cowcode[gtd_data_cy_int$country_txt == "Serbia-Montenegro"] <- 341
gtd_data_cy_int$cowcode[gtd_data_cy_int$country_txt == "West Bank and Gaza Strip"] <- 1020

gtd_data_cy_int <- gtd_data_cy_int %>%
  ungroup() %>%
  dplyr::select(cowcode, year, count_attacks_int)

## Number Terrorist Attacks Domestic + transnational ##

gtd_data_cy_attacks <- gtd_data_cy %>%
  group_by(country_txt, iyear) %>% # group by country and year 
  summarize(count_attacks = n()) # count_attacks is variable of how many attacks occurred in a given year in a country

## Add COWCode to GDT Data 
gtd_data_cy_attacks$cowcode <- countrycode(gtd_data_cy_attacks$country_txt, "country.name", "cown", warn = TRUE)
gtd_data_cy_attacks$year <- gtd_data_cy_attacks$iyear

gtd_data_cy_attacks$cowcode[gtd_data_cy_attacks$country_txt == "Hong Kong"] <- 715
gtd_data_cy_attacks$cowcode[gtd_data_cy_attacks$country_txt == "Serbia"] <- 345
gtd_data_cy_attacks$cowcode[gtd_data_cy_attacks$country_txt == "Serbia-Montenegro"] <- 341
gtd_data_cy_attacks$cowcode[gtd_data_cy_attacks$country_txt == "West Bank and Gaza Strip"] <- 1020

gtd_data_cy_attacks <- gtd_data_cy_attacks %>%
  ungroup() %>%
  dplyr::select(cowcode, year, count_attacks)

## Fatalities Domestic Terrorist Attacks ##

gtd_data_cy_fatalities_dom <- gtd_data_cy %>%
  filter(INT_ANY==0) %>%
  group_by(country_txt, iyear) %>% # group by country and year 
  summarize(sum_nkill_dom = sum.new(nkill)) # count_attacks is variable of how many attacks occurred in a given year in a country

## Add COWCode to GDT Data 
gtd_data_cy_fatalities_dom$cowcode <- countrycode(gtd_data_cy_fatalities_dom$country_txt, "country.name", "cown", warn = TRUE)
gtd_data_cy_fatalities_dom$year <- gtd_data_cy_fatalities_dom$iyear

gtd_data_cy_fatalities_dom$cowcode[gtd_data_cy_fatalities_dom$country_txt == "West Bank and Gaza Strip"] <- 1020

gtd_data_cy_fatalities_dom <- gtd_data_cy_fatalities_dom %>%
  ungroup() %>%
  dplyr::select(cowcode, year, sum_nkill_dom)

## Fatalities Transnational Terrorist Attacks ##

gtd_data_cy_fatalities_int <- gtd_data_cy %>%
  filter(INT_ANY==1) %>%
  group_by(country_txt, iyear) %>% # group by country and year 
  summarize(sum_nkill_int = sum.new(nkill)) # count_attacks is variable of how many attacks occurred in a given year in a country

## Add COWCode to GDT Data 
gtd_data_cy_fatalities_int$cowcode <- countrycode(gtd_data_cy_fatalities_int$country_txt, "country.name", "cown", warn = TRUE)
gtd_data_cy_fatalities_int$year <- gtd_data_cy_fatalities_int$iyear

gtd_data_cy_fatalities_int$cowcode[gtd_data_cy_fatalities_int$country_txt == "Hong Kong"] <- 715
gtd_data_cy_fatalities_int$cowcode[gtd_data_cy_fatalities_int$country_txt == "Serbia"] <- 345
gtd_data_cy_fatalities_int$cowcode[gtd_data_cy_fatalities_int$country_txt == "Serbia-Montenegro"] <- 341
gtd_data_cy_fatalities_int$cowcode[gtd_data_cy_fatalities_int$country_txt == "West Bank and Gaza Strip"] <- 1020

gtd_data_cy_fatalities_int <- gtd_data_cy_fatalities_int %>%
  ungroup() %>%
  dplyr::select(cowcode, year, sum_nkill_int)

## Fatalities Domestic + Transnational Terrorist Attacks ##

gtd_data_cy_fatalities <- gtd_data_cy %>%
  group_by(country_txt, iyear) %>% # group by country and year 
  summarize(sum_nkill = sum.new(nkill)) # count_attacks is variable of how many attacks occured in a given year in a country

## Add COWCode to GDT Data 
gtd_data_cy_fatalities$cowcode <- countrycode(gtd_data_cy_fatalities$country_txt, "country.name", "cown", warn = TRUE)
gtd_data_cy_fatalities$year <- gtd_data_cy_fatalities$iyear

gtd_data_cy_fatalities$cowcode[gtd_data_cy_fatalities$country_txt == "Hong Kong"] <- 715
gtd_data_cy_fatalities$cowcode[gtd_data_cy_fatalities$country_txt == "Serbia"] <- 345
gtd_data_cy_fatalities$cowcode[gtd_data_cy_fatalities$country_txt == "Serbia-Montenegro"] <- 341
gtd_data_cy_fatalities$cowcode[gtd_data_cy_fatalities$country_txt == "West Bank and Gaza Strip"] <- 1020

gtd_data_cy_fatalities <- gtd_data_cy_fatalities %>%
  ungroup() %>%
  dplyr::select(cowcode, year, sum_nkill)



##############################################################################################################
##############################################################################################################

# start inclusion: 0.05 
# cum inclusion: 0.1
# year turn: 0.03
# cum turn: 0.1
# tolerance: 5 years

ert_data <- ERT::get_eps(data = ERT::vdem,
                    start_incl = 0.05,
                    cum_incl = 0.1,
                    year_turn = 0.03,    
                    cum_turn = 0.1,
                    tolerance = 5)


ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)


ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem12 <- vdem12 %>%
  left_join(ert_data, by =c("country_id", "year"))

table(vdem12$aut_ep_outcome)
table(vdem12$aut_ep_outcome_agg)


#########################################################################################################
###########################################################################################################

#### Merge Data with V-DEM Data ####

vdem12 <- vdem12 %>%
  left_join(gtd_data_cy_dom, c("cowcode", "year")) %>%
  left_join(gtd_data_cy_int, c("cowcode", "year")) %>%
  left_join(gtd_data_cy_attacks, c("cowcode", "year")) %>%
  left_join(gtd_data_cy_fatalities_dom, c("cowcode", "year")) %>%
  left_join(gtd_data_cy_fatalities_int, c("cowcode", "year")) %>%
  left_join(gtd_data_cy_fatalities, c("cowcode", "year")) 

  
vdem12 <- distinct(vdem12, country_id, year, .keep_all= TRUE) # Distinct observations


vdem12 <- vdem12 %>%
  mutate(count_attacks = ifelse(is.na(count_attacks), 0, count_attacks), 
            count_attacks_int = ifelse(is.na(count_attacks_int), 0, count_attacks_int), 
            count_attacks_dom = ifelse(is.na(count_attacks_dom), 0, count_attacks_dom), 
            sum_nkill = ifelse(is.na(sum_nkill), 0, sum_nkill), 
            sum_nkill_dom = ifelse(is.na(sum_nkill_dom), 0, sum_nkill_dom), 
            sum_nkill_int = ifelse(is.na(sum_nkill_int), 0, sum_nkill_int)) %>%
  mutate(count_attacks = ifelse(year == 1993 | year <1970, NA, count_attacks), 
         count_attacks_int = ifelse(year == 1993  | year <1970, NA,  count_attacks_int), 
         count_attacks_dom = ifelse(year == 1993  | year <1970, NA, count_attacks_dom), 
         sum_nkill = ifelse(year == 1993  | year <1970, NA,  sum_nkill), 
         sum_nkill_dom = ifelse(year == 1993  | year <1970, NA,  sum_nkill_dom), 
         sum_nkill_int = ifelse(year == 1993  | year <1970, NA,  sum_nkill_int)) 

#### Ethnic Fractionalization and Excluded Population ####

# load data
eprlong <- read.csv("data/epr/EPR-2018.1.1.csv", as.is = T) %>%
  rename(country = statename) %>%
  arrange(country)


# recode differing country names
eprlong$country[eprlong$country == "Czech Republic"] <- "Czechia"
eprlong$country[eprlong$country == "Republic of Korea"] <- "South Korea"
eprlong$country[eprlong$country == "Serbia" | 
                  eprlong$country == "Serbia and Montenegro"] <- "Yugoslavia/Serbia"

# transform into country-year data
eprlongcy <- eprlong %>% 
  rowwise() %>%
  do(data.frame(country = .$country,
                from = .$from,
                to = .$to,
                year = seq(.$from, .$to, by = 1),
                group = .$group,
                gwgroupid =.$gwgroupid,
                umbrella = .$umbrella,
                size = .$size,
                status = .$status)) %>%
  arrange(country, year)

# reduce dataset
epr <- eprlongcy %>%
  filter(year >= 1964)

# add country-year variable
epr$countryyear <- paste(epr$country, epr$year)

# create exclusion dummy variable
epr <- epr %>%
  mutate(exclusion = if_else(status == "DISCRIMINATED"  | 
                               status == "POWERLESS" |
                               status == "SELF-EXCLUSION", 1, 0))

# create exclusion ratio variable
epr <- epr %>%
  group_by(country, year) %>%
  summarise(eprratio = sum(size[exclusion == 1]))

epr$cowcode <- countrycode(epr$country, "country.name", "cown", warn = TRUE)

epr <- epr %>%
  ungroup()%>%
  dplyr::select(cowcode, year, eprratio)

vdem12 <- vdem12 %>%
  left_join(epr, by = c("cowcode", "year"))

rm(epr, eprlong, eprlongcy)

### Extrapolate Data ? ####

summary(vdem12$eprratio)

vdem12 %>% filter(year==2017) %>% # 2018, 2019 no data 
  summarize(mean = mean.new(eprratio)) 

vdem12 <- group_by(vdem12, country_id)
vdem12$eprratio_extrapol <- na_interpolation(vdem12$eprratio)

vdem12 <- ungroup(vdem12)

vdem12 %>% filter(year==2019) %>%
  summarize(mean = mean.new(eprratio_extrapol))

summary(vdem12$eprratio_extrapol)


#### Military Personell Data ####

## Correlates of War National Material Capabilities Dataset ##

cow <- read.dta13("data/cow/NMC_5_0.dta")

summary(cow$milper)

cow <- cow %>%
  mutate(milper = ifelse(milper <0, NA, milper)) %>%
  rename(cown = ccode) %>%
  dplyr::select(cown, year, milper, stateabb)

summary(cow$milper)

vdem12 <- vdem12 %>%
  rename(cown = COWcode)

vdem12 <- vdem12 %>%
  left_join(cow, c("cown", "year"))

summary(vdem12$milper)

vdem12 %>% filter(year==2012) %>%
  summarize(mean = mean.new(milper)) # last observation ins 2012 -> extrapolate military personnel variable

vdem12 <- group_by(vdem12, country_id)
vdem12$milper_extrapol <- na_interpolation(vdem12$milper)

vdem12 <- ungroup(vdem12)

vdem12 %>% filter(year==2020) %>%
  summarize(mean = mean.new(milper_extrapol)) # last observation ins 2012 -> extrapolate military personnel variable


#### GDP Data and Population Data from Fariss et al. (2017) ####

# GDP per capita ##
summary(vdem12$e_gdppc)


ggplot(vdem12, aes( x= e_gdppc)) +
  geom_density() 

## Population Data ##

summary(vdem12$e_pop)


#### UCDP/PRIO Armed Conflict Dataset version 20.1  ####

load("data/ucdp/ucdp-prio-acd-201.RData")

ucdp_prio_acd_201 <- ucdp_prio_acd_201 %>%
  filter(type_of_conflict >2)

ucdp_prio_acd_201$cowcode <-countrycode(ucdp_prio_acd_201$location, "country.name", "cown", warn = TRUE) 

ucdp_prio_acd_201 <- ucdp_prio_acd_201 %>%
  mutate(civil_war_ucdp = 1) %>%
  dplyr::select(location, cowcode, year, civil_war_ucdp, intensity_level)

vdem12 <- vdem12 %>%
  left_join(ucdp_prio_acd_201, c("cowcode", "year"))

summary(vdem12$civil_war_ucdp)

vdem12$civil_war_ucdp <- replace(vdem12$civil_war_ucdp, is.na(vdem12$civil_war_ucdp), 0)

summary(vdem12$civil_war_ucdp)

vdem12 <- distinct(vdem12, country_id, year, .keep_all= TRUE) # Distinct observations

#### Globalization Index ####

kof <- read.dta13("data/kof/KOFGI_2021_public.dta")

kof_subset <- kof %>%
  dplyr::select(code, country, year, KOFPoGIdf, KOFEcGIdf)

kof_subset$cowcode <-countrycode(kof_subset$country, "country.name", "cown", warn = TRUE) 

kof_subset$cowcode[kof_subset$country == "Hong Kong SAR, China"] <- 997
kof_subset$cowcode[kof_subset$country == "Serbia"] <- 345

vdem12 <- vdem12 %>%
  left_join(kof_subset, c("cowcode", "year"))

vdem12 <- distinct(vdem12, country_id, year, .keep_all= TRUE) # Distinct observations

summary(vdem12$KOFPoGIdf)
summary(vdem12$KOFEcGIdf)


#############################################################################################################
#############################################################################################################

#### 1. Democracy Stock Variable ####

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  library(zoo)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem12_democratic_stock_ds <-vdem12 %>%
  dplyr::select(country_id, year, v2x_polyarchy) %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.05)) %>%
  dplyr::select(-v2x_polyarchy)

vdem12 <- vdem12 %>%
  left_join(vdem12_democratic_stock_ds, by = c("country_id", "year"))

rm(vdem12_democratic_stock_ds)

library(ggthemes)

ggplot(subset(vdem12, country_name %in% c("United States of America", "Germany", "Poland", 
                                                   "China", "Russia", "South Africa")), aes(x = year)) +
  geom_line(aes(y = v2x_polyarchy), color = "red") +
  geom_line(aes(y = democracy_stock), color = "blue") +
  facet_wrap(~country_name, ncol = 2) +
  ylim(0,1) +
  theme_clean()

#### 2. Moving Average Domestic Attacks and Domestic Fatalities ####

vdem12 <- vdem12 %>%
  group_by(country_id) %>%
  mutate(ma_count_attacks_dom = rollmean(count_attacks_dom, k = 5, fill = "TRUE",  align = "right") , 
         ma_sum_nkill_dom = rollmean(sum_nkill_dom, k = 5, fill = "TRUE",  align = "right"))


saveRDS(vdem12, "data/vdem12.rds")




